library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.1
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
data <- read.csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\Alekya\\ait 580\\shootings.csv')
head(data)
##   X INCIDENT_KEY OCCUR_DATE OCCUR_TIME     BORO LOC_OF_OCCUR_DESC PRECINCT
## 1 0    228798151 05/27/2021   21:30:00   QUEENS           UNKNOWN      105
## 2 1    137471050 06/27/2014   17:40:00    BRONX           UNKNOWN       40
## 3 2    147998800 11/21/2015   03:56:00   QUEENS           UNKNOWN      108
## 4 3    146837977 10/09/2015   18:30:00    BRONX           UNKNOWN       44
## 5 4     58921844 02/19/2009   22:58:00    BRONX           UNKNOWN       47
## 6 5    219559682 10/21/2020   21:36:00 BROOKLYN           UNKNOWN       81
##   JURISDICTION_CODE LOC_CLASSFCTN_DESC LOCATION_DESC STATISTICAL_MURDER_FLAG
## 1               0.0            UNKNOWN       UNKNOWN                   False
## 2               0.0            UNKNOWN       UNKNOWN                   False
## 3               0.0            UNKNOWN       UNKNOWN                    True
## 4               0.0            UNKNOWN       UNKNOWN                   False
## 5               0.0            UNKNOWN       UNKNOWN                    True
## 6               0.0            UNKNOWN       UNKNOWN                    True
##   PERP_AGE_GROUP PERP_SEX PERP_RACE VIC_AGE_GROUP VIC_SEX       VIC_RACE
## 1        UNKNOWN  UNKNOWN   UNKNOWN         18-24       M          BLACK
## 2        UNKNOWN  UNKNOWN   UNKNOWN         18-24       M          BLACK
## 3        UNKNOWN  UNKNOWN   UNKNOWN         25-44       M          WHITE
## 4        UNKNOWN  UNKNOWN   UNKNOWN           <18       M WHITE HISPANIC
## 5          25-44        M     BLACK         45-64       M          BLACK
## 6        UNKNOWN  UNKNOWN   UNKNOWN         25-44       M          BLACK
##   X_COORD_CD Y_COORD_CD           Latitude          Longitude
## 1    1058925   180924.0 40.662964620000025 -73.73083868899994
## 2    1005028   234516.0  40.81035186300005 -73.92494232599995
## 3    1007668   209836.5  40.74260663300004 -73.91549174199997
## 4    1006537   244511.1  40.83778200300002 -73.91945661499993
## 5    1024922   262189.4  40.88623791800006 -73.85290950899997
## 6    1004234   186461.7 40.678456718000064 -73.92795224099996
##                                         Lon_Lat
## 1 POINT (-73.73083868899994 40.662964620000025)
## 2  POINT (-73.92494232599995 40.81035186300006)
## 3  POINT (-73.91549174199997 40.74260663300004)
## 4  POINT (-73.91945661499994 40.83778200300003)
## 5  POINT (-73.85290950899997 40.88623791800006)
## 6 POINT (-73.92795224099996 40.678456718000064)

Univariate Analysis

# Renaming factor levels
data$STATISTICAL_MURDER_FLAG <- factor(data$STATISTICAL_MURDER_FLAG)

# Bar plot for the distribution of homicide cases
ggplot(data, aes(x = STATISTICAL_MURDER_FLAG, fill = STATISTICAL_MURDER_FLAG)) +
  geom_bar() +
  geom_text(stat='count', aes(label=..count..), vjust=-0.5,fontface = "bold") +
  labs(title = 'Distribution of Homicide Cases',
       x = 'Statistical Murder Flag', y = 'Count') +
  scale_fill_manual(values = c("False" = "#FF9999", "True" = "#3399FF") , name = "Homicide Flag") +
  theme_minimal()
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

boro_counts <- table(data$'BORO')
boro_df <- as.data.frame(boro_counts)
names(boro_df) <- c('BORO', 'Count')

# Define a custom color palette
my_colors <- c("BRONX" = "#99FFFF", "BROOKLYN" = "#FFCCCC", "MANHATTAN" = "#66CCCC", 
               "QUEENS" = "#FFFF99", "STATEN ISLAND" = "#CCCCFF")

# Plotting the pie chart 
ggplot(boro_df, aes(x="", y=Count, fill=BORO)) + 
  geom_bar(width = 1, stat = "identity") +
  coord_polar("y", start=0) +
  theme_void() +
  labs(title = 'Frequency of Incidents by Borough') +
  theme(legend.title = element_blank()) +
  scale_fill_manual(values = my_colors) +
  geom_text(aes(label = paste0(round(Count/sum(Count)*100, 1), "%"),fontface = "bold"), 
            position = position_stack(vjust = 0.5))

# Prepare the data for plotting
data_count <- as.data.frame(table(data$BORO, data$STATISTICAL_MURDER_FLAG))
names(data_count) <- c('BORO', 'STATISTICAL_MURDER_FLAG', 'Count')

# Convert STATISTICAL_MURDER_FLAG to a factor 
data_count$STATISTICAL_MURDER_FLAG <- as.factor(data_count$STATISTICAL_MURDER_FLAG)

# Define custom colors
color_map <- c('False' = '#2ca02c', 'True' = '#ff7f0e')

# Create a bar plot 
ggplot(data_count, aes(x = BORO, y = Count, fill = STATISTICAL_MURDER_FLAG)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  geom_text(aes(label = Count), vjust = -0.3,fontface = "bold", position = position_dodge(width = 0.9)) +
  scale_fill_manual(values = color_map, name = "Homicide Flag") +
  labs(title = 'Location vs. Homicide Cases', x = 'Borough', y = 'Count', fill = "Homicide Status") +
  theme_minimal()

age_groups <- c('<18', '18-24', '25-44', '45-64', '65+')

# Counting the values for both perpetrator and victim age groups
perp_age_counts <- table(factor(data$PERP_AGE_GROUP, levels = age_groups))
vic_age_counts <- table(factor(data$VIC_AGE_GROUP, levels = age_groups))

# Creating the bar plot using Plotly
fig <- plot_ly() %>%
  add_trace(x = names(perp_age_counts), y = as.numeric(perp_age_counts), type = 'bar', name = 'Perpetrator', marker = list(color = '#99CC99')) %>%
  add_trace(x = names(vic_age_counts), y = as.numeric(vic_age_counts), type = 'bar', name = 'Victim', marker = list(color = '#9999FF')) %>%
  layout(title = 'Distribution of Perpetrator and Victim Age Groups',
         xaxis = list(title = 'Age Group'), yaxis = list(title = 'Count'), barmode = 'group')

# Show plot
fig
# Plot for Perpetrator Race
perp_race_counts <- data %>%
  filter(PERP_RACE != "UNKNOWN") %>%
  count(PERP_RACE) %>%
  arrange(desc(n))

bar_color_perp <- "palevioletred"

perp_race_counts$PERP_RACE <- factor(perp_race_counts$PERP_RACE, levels = perp_race_counts$PERP_RACE)

fig_perp <- plot_ly(data = perp_race_counts, x = ~PERP_RACE, y = ~n, type = 'bar', name = 'Perpetrator Race',
                    marker = list(color = bar_color_perp),
                    hoverinfo = 'y') %>%
  layout(title = 'Distribution of Perpetrator Race', xaxis = list(title = 'Race'), yaxis = list(title = 'Count'))

# Show plot for Perpetrator Race
fig_perp
# Plot for Victim Race
vic_race_counts <- data %>%
  filter(VIC_RACE != "UNKNOWN") %>%
  count(VIC_RACE) %>%
  arrange(desc(n))

bar_color_vic <- "tomato"

vic_race_counts$VIC_RACE <- factor(vic_race_counts$VIC_RACE, levels = vic_race_counts$VIC_RACE)

fig_vic <- plot_ly(data = vic_race_counts, x = ~VIC_RACE, y = ~n, type = 'bar', name = 'Victim Race',
                   marker = list(color = bar_color_vic),
                   hoverinfo = 'y') %>%
  layout(title = 'Distribution of Victim Race', xaxis = list(title = 'Race'), yaxis = list(title = 'Count'))

# Show plot for Victim Race
fig_vic